# -*- coding: utf-8 -*-
import scrapy
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
from invest.items import InvestItem


class InvSpider(CrawlSpider):
    name = 'inv'
    start_urls = ['http://zdb.pedaily.cn/inv/p1/',
                  'http://zdb.pedaily.cn/inv/p2/']
    rules = (
        Rule(LinkExtractor(allow = ('http://zdb\.pedaily\.cn/inv/show\d+/')), callback = 'parse_item'),
        Rule(LinkExtractor(allow = ('http://zdb\.pedaily\.cn/inv/p\d')))
    )
    def parse_item(self, response):
        item = InvestItem(financiers = response.xpath('//div[@class="info"]/ul/li[1]/a/text()').extract_first() or
                          response.xpath('//div[@class="info"]/ul/li[1]/text()').extract(),
                          investors = response.xpath('//div[@class="info"]/ul/li[2]/a/text()').extract() or
                          response.xpath('//div[@class="info"]/ul/li[2]/text()').extract_first(),
                          amount = response.xpath('//div[@class="info"]/ul/li[3]/span[@class]/text()').extract(),
                          rd = response.xpath('//div[@class="info"]/ul/li[4]/span[@class="b round"]/text()').extract_first(),
                          date = response.xpath('//div[@class="info"]/ul/li[5]/text()').extract_first(),
                          category = response.xpath('//div[@class="info"]/ul/li[6]/a/text()').extract()
                          )
        yield item

